

library(pacman)

p_load(tidyverse)

# Function to load and filter data
load_and_filter <- function(file_path) {
  read.csv(file_path) |> 
    filter(Q_TerminateFlag == "" & Response_Type == "Complete")
}

# Apply to both datasets
study1 <- load_and_filter("study 1 data.csv")
study2 <- load_and_filter("study 2 data.csv")


# Cleaning and Recoding 


# Combine Studies 1 and 2 ---------------------------

# vars to keep (common to both studies)
vars <- c("study", "tx", "pres_vote", 
          "conf_ine_1", "conf_ine_2", "conf_ine_3", "conf_ine_4",
          "inst_trust_1", "inst_trust_4", "know_coalition", "know_ine",
          "know_majority", "believe_amlo_1", "believe_amlo_2",
          "sex", "age", "sel", "age")

# select vars from study 1
study1_slim <- study1 |> 
  mutate(study = 1) |> 
  select(all_of(vars))


# select vars from study 2
study2_slim <- study2 |> 
  mutate(study = 2) |> 
  select(all_of(vars), 
         # vars in study 2 only:
         heard_reform, 
         op_reform, 
         conf_ine_post_1, 
         conf_ine_post_2
  )

# these vars are not in study 1, add NA rows so we can rbind
study1_slim$heard_reform <- NA
study1_slim$op_reform <- NA
study1_slim$conf_ine_post_1 <- NA
study1_slim$conf_ine_post_2 <- NA


# recode pre-treatment trust in ine
# this var is numeric in study 1
study2_slim <- study2_slim |> 
  mutate(
    across(c("inst_trust_1", "inst_trust_4"),
           ~ case_match(.x,
                        "Mucha  confianza" ~ 4,
                        "Moderada  confianza" ~ 3,
                        "Algo de  confianza" ~ 2,
                        "Poca  confianza" ~ 1,
                        "Nada de confianza" ~ 0)
    )
  )


# make sure everything matches
colnames(study1_slim) == colnames(study2_slim)

# combine studies
dat <- rbind(study1_slim, study2_slim)

# Recodes -------------------------------------------

# recode main dv
dat <- dat %>% 
  mutate(
    across(
      starts_with("conf_ine"), 
      ~case_match(.x, 
                  "Totalmente en desacuerdo" ~ 0,
                  "En desacuerdo" ~ 1, 
                  "Ni de acuerdo ni en desauerdo" ~ 2,
                  "De acuerdo" ~ 3,
                  "Totalmente de acuerdo" ~ 4),
      .names = "{.col}_recode"
    )
  )

# amlo committed fraud + INE controlled by special interests
dat <- dat %>% 
  mutate(
    across(
      starts_with("believe_amlo"), 
      ~case_match(.x, 
                  "Totalmente en desacuerdo" ~ 0,
                  "En desacuerdo" ~ 1, 
                  "Ni de acuerdo ni en desacuerdo" ~ 2,
                  "De Acuerdo" ~ 3,
                  "Totalmente de acuerdo" ~ 4),
      .names = "{.col}_recode"
    )
  )


# now take the average for our main DV
dat <- dat %>% 
  mutate(
    # main dv
    conf_ine = 
      ((conf_ine_1_recode + 
          conf_ine_2_recode + 
          conf_ine_3_recode + 
          conf_ine_4_recode)/4),
    
    # two question index pre-debrief 
    conf_ine_t1 = 
      ((conf_ine_1_recode + 
          conf_ine_2_recode)/2),
    
    # two question post-debrief
    conf_ine_t2 = 
      ((conf_ine_post_1_recode + 
          conf_ine_post_2_recode)/2),
    
  )

# make tx a factor and character
dat <- dat |> 
  mutate(
    tx = as_factor(tx) |> 
      relevel(ref = "control"), 
    
    tx_char = as.character(tx),
  )

# a couple more recodes
dat <- dat |>  
  mutate(
    
    # vote intention
    vote_intent = case_match(pres_vote, 
                             "Claudia Sheinbaum (Morena-PT-PVEM)" ~ "Claudia",
                             "Xóchitl Gálvez (PAN-PRI-PRD)" ~ "Xochitl",
                             c("No votaría", "Otro") ~ "None/Other"),
    
    # political knowledge 1 
    # this codes correct as 1 and incorrect/missing as 0
    know1 = if_else(know_ine == "El Instituto Nacional Electoral (INE)", 1, 0),
    
    # political knowledge 2
    know2 = if_else(know_coalition == "Xóchitl Gálvez", 1, 0),
    
    #political knowledge 3
    know3 = if_else(know_majority == "MORENA (Movimiento Regeneración Nacional)", 1, 0),
    # political knowledge index
    know_index = ((know1 + know2 + know3)/3), 
    
    # rename pre-treatment trust
    trust_ine_pre = inst_trust_1,
    
    # rename trust inn pres
    trust_pres_pre = inst_trust_4,
    
    # gender
    gender = case_match(sex,
                        "Hombre" ~ "Male",
                        "Mujer"  ~ "Female",
                        .default = NA_character_),
    
    # socio-economic status
    sel = case_match(sel, 
                     1 ~ "AB",
                     2 ~ "C+",
                     3 ~ "C", 
                     4 ~ "C-",
                     5 ~ "D+",
                     6 ~ "D", 
                     .default = NA_character_)
  )


# Save RDS
saveRDS(dat, "cleaned_svy_dat.Rds")

